# CLEANING EVERYDAY RPS DATA 
# ALAN YAN
# SEPTEMBER 25, 2020

#### SETUP ####
#clear environment
rm(list = ls())

#load libraries
library(pacman)
p_load(tidyverse,
       psych)

#load data
dt <- read.csv("01-data/raw-everyday-rps-study.csv", header = TRUE, stringsAsFactors = FALSE)

#### CLEAN ####
dt %>%
  mutate(
    rp1_n = case_when(
      rp1 == 11 ~ 1,
      rp1 == 12 ~ 2,
      rp1 == 13 ~ 3,
      rp1 == 14 ~ 4,
      rp1 == 15 ~ 5,
      rp1 == 16 ~ 6,
      rp1 == 17 ~ 7
    ),
    rp2_n = case_when(
      rp2 == 11 ~ 1,
      rp2 == 12 ~ 2,
      rp2 == 13 ~ 3,
      rp2 == 14 ~ 4,
      rp2 == 15 ~ 5,
      rp2 == 16 ~ 6,
      rp2 == 17 ~ 7
    ),
    rp3_n = case_when(
      rp3 == 11 ~ 1,
      rp3 == 12 ~ 2,
      rp3 == 13 ~ 3,
      rp3 == 14 ~ 4,
      rp3 == 15 ~ 5,
      rp3 == 16 ~ 6,
      rp3 == 17 ~ 7
    ),
    rp4_n = case_when(
      rp4 == 11 ~ 1,
      rp4 == 12 ~ 2,
      rp4 == 13 ~ 3,
      rp4 == 14 ~ 4,
      rp4 == 15 ~ 5,
      rp4 == 16 ~ 6,
      rp4 == 17 ~ 7
    ),
    rp5_n = case_when(
      rp5 == 11 ~ 1,
      rp5 == 12 ~ 2,
      rp5 == 13 ~ 3,
      rp5 == 14 ~ 4,
      rp5 == 15 ~ 5,
      rp5 == 16 ~ 6,
      rp5 == 17 ~ 7
    ),
    rp6_n = case_when(
      rp6 == 11 ~ 1,
      rp6 == 12 ~ 2,
      rp6 == 13 ~ 3,
      rp6 == 14 ~ 4,
      rp6 == 15 ~ 5,
      rp6 == 16 ~ 6,
      rp6 == 17 ~ 7
    ),
    rps_index = (rp1_n + rp2_n + rp3_n + rp4_n + rp5_n + rp6_n - 6)/36,
    r_age_n = (age - 2)/6,
    r_income_n = (income - 1)/11,
    r_edu_n = (education - 1)/6,
    r_sex = factor(case_when(
      sex == 1 ~ "Male", 
      sex == 2 ~ "Female"
    ),
    levels = c("Male",
               "Female")),
    linked_fate_n = case_when(
      linkedfate == 2 ~ 0,
      lfateamt == 1 ~ 1/3,
      lfateamt == 2 ~ 2/3,
      lfateamt == 3 ~ 1
    ),
    idimpt_n = (idimpt - 1)/4,
    bothered1_n = (restbothered1 - 1)/4,
    important1_n = (important1 - 1)/4,
    tell1_n = (tell1 - 1)/4,
    bothered2_n = (bothered2 - 1)/4,
    important2_n = (important2 - 1)/4,
    tell2_n = (tell2 - 1)/4,
    bothered3_n = (bothered3 - 1)/4,
    important3_n = (important3 - 1)/4,
    tell3_n = (tell3 - 1)/4
  ) %>%
  select(
    rp1_n,
    rp2_n,
    rp3_n,
    rp4_n,
    rp5_n,
    rp6_n,
    rps_index,
    r_age_n,
    r_income_n,
    r_edu_n,
    r_sex,
    linked_fate_n,
    idimpt_n,
    bothered1_n,
    important1_n,
    tell1_n,
    bothered2_n,
    important2_n,
    tell2_n,
    bothered3_n,
    important3_n,
    tell3_n
  ) %>%
  drop_na(
    rp1_n,
    rp2_n,
    rp3_n,
    rp4_n,
    rp5_n,
    rp6_n
  ) -> dt.clean

#### CHECK FOR RELIABILITY IN RPS ####
dt.clean %>%
  select(
    rp1_n,
    rp2_n,
    rp3_n,
    rp4_n,
    rp5_n,
    rp6_n
  ) %>%
  factanal(., factors = 2, rotation = "promax") -> factor.output

print(factor.output, digits = 2, cutoff = .0, sort=TRUE)

#### EXPORT ####
write_rds(dt.clean, "01-data/clean-everyday-rps")
write.csv(dt.clean, "01-data/clean-everyday-rps.csv")
